#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2018/10/20 下午 03:25
# @Author  : wasdzy
# @Site    : 
# @File    : __init__.py.py
# @Software: PyCharm
import requests
from bs4 import BeautifulSoup
import sys
import json
import time, codecs

from code.detail import crawl_detail
from code.db_utils import init_db, insert


def mian():
    # 获取数据库连接
    #init_db()
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
        # 'Upgrade-Insecure-Requests':1,
        'Host': 'www.lagou.com',
        'Referer': 'https://www.lagou.com/jobs/list_python?labelWords=sug&fromSearch=true&suginput=p',
        'X-Anit-Forge-Code': '0',
        'X-Anit-Forge-Token': None,
        'X-Requested-With': 'XMLHttpRequest',
    }
    data = {
        'first': 'true',
        'pn': '1',
        'kd': 'python',
    }
    # 获取系统编码
    type = sys.getfilesystemencoding()
    positons = []

    for x in range(0, 30):
        x = x + 1;
        data = {
            'first': 'true',
            'pn': x,
            'kd': 'python',
        }
        # 发送一次请求
        result = requests.post(
            "https://www.lagou.com/jobs/positionAjax.json?city=%E6%88%90%E9%83%BD&needAddtionalResult=false",
            headers=headers, data=data)
        result.encoding = 'utf-8'
        # 设置爬虫返回编码
        content = result.content.decode(type)
        # 转换为json
        json_result = json.loads(content)
        # try:
        page_positons = json_result['content']['positionResult']['result']

        for position in page_positons:
            postion_dict = {
                'positionName': position['positionName'],
                'workYear': position['workYear'],
                'salary': position['salary'],
                'district': position['district'],
                'companyFullName': position['companyFullName'],
            }
            position_id = position['positionId']
            #获得详情
            position_detal = crawl_detail(position_id)
            postion_dict['position_detal'] = position_detal
            # 插入数据库
            i = insert(position['positionName'], position['workYear'], position['salary'], position['district'],
                       position['companyFullName'], postion_dict['position_detal'],int(round(time.time() * 1000)))
            print("插入数据成功: %s" % i)
            time.sleep(10)
    # positons.extend(page_positons)  # 2个列表合并
    positons.append(postion_dict)
    print('第%s页 爬取数据成功' % x)
    time.sleep(10 + x)


    # except Exception as  e:
    #     print(json_result['msg'])
    #     print('第 %s页 爬取数据失败' % x)

    # 输出结果
    # for p in positons:
    #     print('-' * 40)
    #     print(p)

    # 写入本地文件
    # line = json.dumps(positons, ensure_ascii=False)
    # # w 表示写
    # # r 表示读
    # with codecs.open('lagou_1.json', 'w') as fp:
    #     fp.write(codecs.encode(line))
    # print('已经获取到数据')
    # fp.close()

if __name__ == '__main__':
    mian()
